set more off
cap log close

*************************
*************************
***creates analysis dataset for first stage***
***Pollution Monitors****
*************************
*************************

***Work draws from Claudia's Wind/school file and Mike's distance file
***This revision of the first stage,



**************Globals********************

	global home "C:\Users\jaheisse\Dropbox\Research on Florida Wind Patters"

	global roadsetup "$home\roadsetup\samples"
	global output "$home\roadsetup\"
	global samples "$home\School wind pollution\schools downwind"


cap log using "$output\mk schools downwind loop JAH.log", replace

***Looping over one ID or another:
	foreach file in aadt18000 aadt25000 aadt48000 USH IS {  

display "`file'"
use "$roadsetup\schwind5mi_schroad1mi_schoolto`file'.dta", clear


*****************merge on closest road**********;


	merge m:1 ncessch using "$roadsetup\schoolto`file'.dta"
	tab _merge
	
*note, the using data has observations we dropped either from being more
*than half a mile from a highway or  observations we couldn't fill in wind data for

	drop if _merge==2
	drop _merge
	

***JAH 9/18/17: Replacing windvalue as missing if=0
rename windvalue windvalue1
rename im_windvalue_1dayout windvalueimp	
rename im_circmean_1dayout circmeanimp
	tab windvalue1 if _n<1000, missing
	replace windvalue1=. if windvalue1==0
	
*save "$roadsetup\schoolto`file'_towind.dta", replace

*use "$roadsetup\schoolto`file'_towind.dta", replace

* extract date from date time
	gen date=dofc(datetime)

	
************impute windvalue if missing values and calculate the circlear mean*****************
	
	
	sum windvalue* circmean* circmeanimp


* DES 7/20/17: Make a continuous measure of treatment based on windvalue
	gen windtreat= abs(angle_degrees-windvalueimp)   /*8/7/17: updated to windvalue1 to windvalueimp */
	label var windtreat "Downwind intensity, windvalue"
	replace windtreat = 360 - windtreat if windtreat>180 
	*sum windtreat angle_degrees windvalue1
* order for ease in spot check
	order angle_degrees windvalue1 windvalueimp windtreat 
* now normalize so that it is between 0 and 1 with 1 treated and 0 not
	sum windtreat
	replace windtreat = 1-(windtreat/180)
	replace windtreat=. if mi_to_nid>1
* JAH 1/23/18: Make it zero for 180 degrees upwind, then 0-1 for downwind
* JAH 3/14/18: Make it "windtreatnew" inctead of replacing circtreat 
	*new code:
	gen windtreatnew=.	
	replace windtreatnew = 0 if windtreat<=.5
	replace windtreatnew = (windtreat-.5)*2 if windtreat>.5&windtreat!=.
	sum windtreat windtreatnew
	

* DES 7/20/17: Make a continuous measure of treatment based on circmean
	gen circtreat= abs(angle_degrees-circmeanimp)
	label var circtreat "Downwind intensity, circmean"
* JAH 1/23/18: Make it zero for 180 degrees upwind, then 0-1 for downwind
	replace circtreat = 360 - circtreat if circtreat>180 
	replace circtreat = 1-(circtreat/180)
	replace circtreat = . if mi_to_nid>1
	*new code:
* JAH 3/14/18: Make it "circtreatnew" inctead of replacing circtreat 
	gen circtreatnew=.	
	replace circtreatnew = 0 if circtreat<=.5
	replace circtreatnew = (circtreat-.5)*2 if circtreat>.5&circtreat!=.
	
	sum windtreat windtreatnew circtreat circtreatnew
	
*********additional data clean up: making month, hour, site-month-hour variable*****************
	egen school_closest = group(ncessch)
	drop month hour
	gen month=month(date)
	gen hour=hh(datetime)
	gen sitemonthhour=school_closest*10000+month*100+hour
	gen sitemonth = school_closest*100+month
	order date datetime month hour

************create down wind variables********************************	
	
	gen downwind=0 if windvalueimp~=. & angle_degrees~=.
	label var downwind "Downwind of highway"
	replace downwind=1 if abs(angle_degrees-windvalueimp)<45   /* DES 8/8/17: I changed this to windimp to capture second/third nearest windvalues */
	replace downwind=1 if abs(angle_degrees-360-windvalueimp)<45&angle_degrees>315&windvalueimp<45  /*DES notes: for example if windvalue1 is zero and the
	bearing angle of the monitor is 315 degrees from the road than abs(315-0) >45 but this is still downwind */
	replace downwind=1 if abs(angle_degrees+360-windvalueimp)<45&angle_degrees<45&windvalueimp>315   /*DES notes: for example if windvalue1 is 316 and angle_degrees
	is 0 than this is downwind.  However, abs(angle_degrees-windvalue1) >45.   So, we need to add 360  */   	
	sum downwind circtreat windtreat

	*checks for inuitive consistency
	*bysort downwind: sum windtreat
	*bysort downwind: sum circtreat

*now create different values of downwind based on distance
	foreach i in 1 5 10 20 30 {
	gen downwind_`i'=0 if windvalueimp~=. & angle_degrees~=.
	label var downwind "Downwind and within `i' 10ths of mi of highway"
	replace downwind_`i'=1 if downwind==1 & mi_to_nid<=`i'/10
*code monitors outside of distance threshold as missing
	replace downwind_`i'=. if mi_to_nid>`i'/10
							  }
	sum downwind downwind_5 downwind_10 downwind_20 downwind_30 circtreat windtreat
* in other words if you cross the 360/0 in the distance between the two than
* you need to add 360 to the variable in the 1st qudrant 


************create up wind variables********************************	
	
	gen upwind=0 if windvalueimp~=. & angle_degrees~=.
	label var upwind "Upwind of highway"
	replace upwind=1 if abs(angle_degrees-windvalueimp)>135&abs(angle_degrees-windvalueimp)<225   /* JAH 2/22/18: has to be in the +/- 45 degrees from 180n in opposite direction */

	sum downwind upwind circtreat windtreat

	*checks for inuitive consistency
	*bysort downwind: sum windtreat
	*bysort downwind: sum circtreat

*now create different values of upwind based on distance
	foreach i in 1 5 10 20 30 {
	gen upwind_`i'=0 if windvalueimp~=. & angle_degrees~=.
	label var upwind "Upwind and within `i' 10ths of mi of highway"
	replace upwind_`i'=1 if upwind==1 & mi_to_nid<=`i'/10
*code monitors outside of distance threshold as missing
	replace upwind_`i'=. if mi_to_nid>`i'/10
							  }
	sum upwind* downwind downwind_5 downwind_10 downwind_20 downwind_30 circtreat windtreat

* in other words if you cross the 360/0 in the distance between the two than
* you need to add 360 to the variable in the 1st qudrant 


************************ create outcome variables based on pollutant***********************
*
	gen epaOzone = EPAvalue if pollutant=="Ozone"
	gen epaCO = EPAvalue if pollutant=="CO"
	gen epaNO = EPAvalue if pollutant=="NO"
	gen epaNO2 = EPAvalue if pollutant=="NO2"
	gen epaNOx = EPAvalue if pollutant=="NOx"
	gen epaNOy = EPAvalue if pollutant=="NOy"
	gen epaPM10 = EPAvalue if pollutant=="PM10"
	gen epaPM25 = EPAvalue if pollutant=="PM25"
	gen epaPM25_loc = EPAvalue if pollutant=="epaPM25_loc"
	gen epaSO2 = EPAvalue if pollutant=="SO2"
	

******************** create indicator for school day vs. non school day hours
*/
	gen schoolday = (hour>=7 & hour<=16)
	gen latnight = (hour>=22 & hour<=3)
	*tab hour schoolday



* saves an analysis dataset

*save "$samples\downwind_`file'", replace //missing uniqueID for schools>0.5 mi from highway	



*use "$samples\downwind_`file'", replace //missing uniqueID for schools>0.5 mi from highway	


if "schoolto`file'"=="schooltoaadt18000"{
local name aadt18
}

if "schoolto`file'"=="schooltoaadt48000"{
local name aadt48
}


if "schoolto`file'"=="schooltoaadt25000"{
local name aadt25
}


else if "schoolto`file'"=="schooltoUSH"{
local name USH
}
else if "schoolto`file'"=="schooltoIS"{
local name IS
}

display "`name'"

rename downwind_1 down_`name'_1 
rename downwind_5 down_`name'_5
rename upwind_1 up`name'_1
rename upwind_5 up`name'_5
rename circtreat circ`name'
rename windtreat wind`name'

rename mi_to_nid mi_to_nid_`name'
rename windvalueimp windvalueimp`name'
rename downwind_10 downwind_`name'_1mi


collapse down_`name'_1 down_`name'_5 up`name'_1 up`name'_5 circ`name' wind`name' mi_to_nid_`name' downwind_`name'_1mi windvalueimp`name' ncessch, by(uniqueID)
rename uniqueID school
gen year=2010

save "$roadsetup\collapsed_`file'.dta", replace		
}

************************************************************************************************************************************************************************************************************************************
***Now, do 5 closest stuff:

display "5 closest party"
	foreach type in aadt25k mjrhwy {  

*****************merge on closest road**********;
	if "`type'" == "mjrhwy" {
	use "$roadsetup\schwind5mi_schroad1mi_mjrrds_5nearest.dta", clear
	merge m:1 ncessch using "$roadsetup\schooltomajorrds_mjrrds_5closest.dta"
	tab _merge

	}
	else {
	use "$roadsetup\schwind5mi_schroad1mi_aadt25k_5nearest.dta", clear
	merge m:1 ncessch using "$roadsetup\schooltomajorrds_aadt25k_5closest.dta"
	tab _merge

	}
	
	
*note, the using data has observations we dropped either from being more
*than half a mile from a highway or  observations we couldn't fill in wind data for

	drop if _merge==2
	drop _merge

if "`type'" =="mjrhwy" {
	forv i=1/5 {
		gen keep=1 if strpos(ROUTE`i', "I")
		replace keep=1 if strpos(ROUTE`i', "US")
		replace angle_degrees`i'=. if keep !=1
		drop keep
		}
	}
	

***JAH 9/18/17: Replacing windvalue as missing if=0
rename windvalue windvalue1
rename im_windvalue_1dayout windvalueimp	
rename im_circmean_1dayout circmeanimp
	tab windvalue1 if _n<1000, missing
	replace windvalue1=. if windvalue1==0
	
	
	
*save "$roadsetup\schooltomajorrds_aadt_5closest_towind.dta", replace

*use  "$roadsetup\schooltomajorrds_aadt_5closest_towind.dta", replace

* extract date from date time
	gen date=dofc(datetime)

	
************impute windvalue if missing values and calculate the circlear mean*****************
	
	
	sum windvalue* circmean* circmeanimp

* DES 7/20/17: Make a continuous measure of treatment based on windvalue
* JAH 5/23/18: Do it for each of the 5 matches roads
forv i=1/5 {
	gen windtreat`i'= abs(angle_degrees`i'-windvalueimp)   /*8/7/17: updated to windvalue1 to windvalueimp */
	label var windtreat`i' "Downwind intensity, windvalue, site `i'"
	replace windtreat`i' = 360 - windtreat`i' if windtreat`i'>180 
	sum windtreat`i' angle_degrees`i' windvalue1

* order for ease in spot check
	order  windvalue1 windvalueimp angle_degrees`i' windtreat`i'
* now normalize so that it is between 0 and 1 with 1 treated and 0 not
	sum windtreat`i'
	replace windtreat`i' = 1-(windtreat`i'/180)
	replace windtreat`i'=. if mi_to_nid`i'>1
	
* JAH 1/23/18: Make it zero for 180 degrees upwind, then 0-1 for downwind
* JAH 3/14/18: Make it "windtreatnew" inctead of replacing circtreat 
	*new code:
	gen windtreatnew`i'=.	
	replace windtreatnew`i' = 0 if windtreat`i'<=.5
	replace windtreatnew`i' = (windtreat`i'-.5)*2 if windtreat`i'>.5&windtreat`i'!=.
	sum windtreat`i' windtreatnew`i'	
		
* DES 7/20/17: Make a continuous measure of treatment based on circmean
	gen circtreat`i'= abs(angle_degrees`i'-circmeanimp)
	label var circtreat`i' "Downwind intensity, circmean, site `i'"
* JAH 1/23/18: Make it zero for 180 degrees upwind, then 0-1 for downwind
	replace circtreat`i' = 360 - circtreat`i' if circtreat`i'>180 
	replace circtreat`i' = 1-(circtreat`i'/180)
	replace circtreat`i' = . if mi_to_nid`i'>1
	*new code:
* JAH 3/14/18: Make it "circtreatnew" inctead of replacing circtreat 
	gen circtreatnew`i'=.	
	replace circtreatnew`i' = 0 if circtreat`i'<=.5
	replace circtreatnew`i' = (circtreat`i'-.5)*2 if circtreat`i'>.5&circtreat`i'!=.
	}
	
	sum windtreat* windtreatnew* circtreat* circtreatnew*
	
*********additional data clean up: making month, hour, site-month-hour variable*****************
	egen school_closest = group(ncessch)
	drop month hour
	gen month=month(date)
	gen hour=hh(datetime)
	gen sitemonthhour=school_closest*10000+month*100+hour
	gen sitemonth = school_closest*100+month
	order date datetime month hour

************create down wind variables********************************	
forv i = 1/5 {	
	gen downwind`i'=0 if windvalueimp!=. & angle_degrees`i'!=.
	label var downwind`i' "Downwind of highway, site `i'"
	replace downwind`i'=1 if abs(angle_degrees`i'-windvalueimp)<45   /* DES 8/8/17: I changed this to windimp to capture second/third nearest windvalues */
	replace downwind`i'=1 if abs(angle_degrees`i'-360-windvalueimp)<45&angle_degrees`i'>315&windvalueimp<45  /*DES notes: for example if windvalue1 is zero and the
	bearing angle of the monitor is 315 degrees from the road than abs(315-0) >45 but this is still downwind */
	replace downwind`i'=1 if abs(angle_degrees`i'+360-windvalueimp)<45&angle_degrees`i'<45&windvalueimp>315   /*DES notes: for example if windvalue1 is 316 and angle_degrees
	is 0 than this is downwind.  However, abs(angle_degrees-windvalue1) >45.   So, we need to add 360  */   	
	
	*sum downwind`i' circtreat`i' windtreat`i'
		*checks for inuitive consistency
		*bysort downwind: sum windtreat
		*bysort downwind: sum circtreat

*now create different values of downwind based on distance
	foreach j in 1 4 5 10 20 30 {
	gen downwind`i'_`j'=0 if windvalueimp~=. & angle_degrees`i'~=.
	label var downwind`i'_`j' "Downwind and within `j' 10ths of mi of highway"
	replace downwind`i'_`j'=1 if downwind`i'==1 & mi_to_nid`i'<=`j'/10
*code monitors outside of distance threshold as missing
	replace downwind`i'_`j'=. if mi_to_nid`i'>`j'/10
							  }
	*sum downwind*  circtreat* windtreat*
* in other words if you cross the 360/0 in the distance between the two than
* you need to add 360 to the variable in the 1st qudrant 

************create up wind variables********************************	
	
	gen upwind`i'=0 if windvalueimp~=. & angle_degrees`i'~=.
	label var upwind`i' "Upwind of highway, site `i'"
	replace upwind`i'=1 if abs(angle_degrees`i'-windvalueimp)>135&abs(angle_degrees`i'-windvalueimp)<225   /* JAH 2/22/18: has to be in the +/- 45 degrees from 180n in opposite direction */

	*sum downwind* upwind* circtreat* windtreat*
		*checks for inuitive consistency
		*bysort downwind: sum windtreat
		*bysort downwind: sum circtreat

*now create different values of upwind based on distance
	foreach j in 1 4 5 10 20 30 {
		gen upwind`i'_`j'=0 if windvalueimp~=. & angle_degrees`i'~=.
		label var upwind`i'_`j' "Upwind and within `i' 10ths of mi of highway, site `i'"
		replace upwind`i'_`j'=1 if upwind`i'==1 & mi_to_nid`i'<=`i'/10
	*code monitors outside of distance threshold as missing
		replace upwind`i'_`j'=. if mi_to_nid`i'>`j'/10
		}						  
	}
	sum upwind* downwind*  circtreat* windtreat*

* in other words if you cross the 360/0 in the distance between the two than
* you need to add 360 to the variable in the 1st qudrant 


	gen schoolday = (hour>=7 & hour<=16)
	gen latnight = (hour>=22 & hour<=3)
	*tab hour schoolday

***grouping data: (JAH 5/30/18: Adding "|windvalueimp==." to make them missing for the collapse, if we don't have treatment data
	foreach j in 4 5 10 {
	*roadcount
		egen rdcnt_win`j'=rownonmiss(downwind*_`j' )
		replace rdcnt_win`j'=. if mi_to_nid1>`j'/10|windvalueimp==.

	*downwind of any road in the hour?
		egen dwind_cnt_win`j'=rowtotal(downwind*_`j' )
		replace dwind_cnt_win`j' =. if mi_to_nid1>`j'/10|windvalueimp==.

	*downwind of any road in the hour?
		egen dwind_any_win`j'=rowmax(downwind*_`j' )			
		replace dwind_any_win`j' =0 if dwind_cnt_win`j'==0
		replace dwind_any_win`j' =. if mi_to_nid1>`j'/10|windvalueimp==.
	}

* saves an analysis dataset

forv i=1/5 {
	rename AADT`i' AADT`i'_`type'
	rename mi_to_nid`i' mi_to_nid`i'_`type'
	}

foreach j in 4 5 10 {
	rename rdcnt_win`j' 	rdcnt_win`j'_`type'
	rename dwind_cnt_win`j' dwind_cnt_win`j'_`type'
	rename dwind_any_win`j' dwind_any_win`j'_`type'
	rename downwind1_`j' 	dwind_1_win`j'_`type'
	}
	
****Making a placebo:
forv i = 1/5 {	
	gen downwind`i'_placebo_`type'=0 if windvalueimp!=. & angle_degrees`i'!=.
	label var downwind`i'_placebo_`type' "Downwind of highway, site `i'"
	replace downwind`i'_placebo_`type'=1 if (angle_degrees`i'-windvalueimp)>45 & (angle_degrees`i'-windvalueimp)<135   // JAH: Blowing perpendicular
	}
	*downwind of any road in the hour?
		egen dwind_cnt_win4_placebo_`type'=rowtotal(downwind*_placebo_`type')
		replace dwind_cnt_win4_placebo_`type' =. if mi_to_nid1>4/10|windvalueimp==.
			
		egen dwind_any_win4_placebo_`type'=rowmax(downwind*_placebo_`type' )			
		replace dwind_any_win4_placebo_`type' =0 if dwind_cnt_win4_placebo_`type'==0
		replace dwind_any_win4_placebo_`type' =. if mi_to_nid1>4/10|windvalueimp==.
	 
	 rename dwind_any_win4_placebo_`type' placebo_any_`type'

*save "$samples\downwind_5closest_`type'", replace //missing uniqueID for schools>0.5 mi from highway	

*use "$samples\downwind_5closest_`type'", replace //missing uniqueID for schools>0.5 mi from highway	

collapse mi_to_nid* AADT* rdcnt_win* dwind_cnt_win* dwind_any_win* dwind_1_win* ncessch placebo_any_*, by(uniqueID ROUTE*)
rename uniqueID school
gen year=2010



save "$roadsetup\collapsed_5closest_`type'.dta", replace	
}
	
************************************************************************************************************************************************************************************************************************************

*now merge these together based on school

use "$roadsetup\collapsed_aadt48000.dta", replace	

*note we want to keep all even if not merged, if any school is treated keep in data


merge 1:1 ncessch using "$roadsetup\collapsed_aadt25000.dta"

tab _merge, missing
drop _merge  


merge 1:1 ncessch using "$roadsetup\collapsed_USH.dta"

tab _merge, missing

drop _merge


merge 1:1 ncessch using "$roadsetup\collapsed_aadt18000.dta"

tab _merge, missing
drop _merge




merge 1:1 ncessch using "$roadsetup\collapsed_IS.dta"
tab _merge, missing
drop _merge

merge 1:1 ncessch using "$roadsetup\collapsed_5closest_mjrhwy.dta"
tab _merge, missing
drop _merge

merge 1:1 ncessch using "$roadsetup\collapsed_5closest_aadt25k.dta"
tab _merge, missing
drop _merge


*create an indicator for downwind more than 60 percent of the time across any of the measures

gen down60anyM4 = .

foreach name in aadt25 aadt48 USH IS { 
gen down60`name'_4 = 1 if (down_`name'_5>=0.6) & (down_`name'_5~=.)  & (mi_to_nid_`name'<=0.4)
replace down60`name'_4 = 0 if down60`name'_4~=1  & (mi_to_nid_`name'<=0.4)
sum mi_to_nid_`name' if down60`name'_4 ==1
sum down_`name'_5 if down60`name'_4 ==1
tab down60`name'_4, missing
replace down60anyM4 = 1 if  down60`name'_4==1
}

*exclude only downwind of 18000 from treatment;
gen down60aadt18_4 = 1 if (down_aadt18_5>=0.6) & (down_aadt18_5~=.) & (mi_to_nid_aadt18<=0.4)
replace down60aadt18_4 = 0 if down60aadt18_4~=1 & mi_to_nid_aadt18<=0.4
tab down60aadt18_4, missing

replace down60anyM4 = 0 if  down60anyM4~=1 & down60aadt18_4==0 & (mi_to_nid_aadt25<=0.4 | mi_to_nid_aadt48<=0.4 | mi_to_nid_USH<=0.4 | mi_to_nid_IS<=0.4)

tab down60anyM4 down60aadt25_4, missing
tab down60anyM4 down60aadt48_4, missing
tab down60anyM4 down60USH_4, missing
tab down60anyM4 down60IS_4, missing

tab down60anyM4 down60aadt18_4, missing

***Downwind more than 60%, 5 nearest roads: 
gen down60_any_aadt25k_4=(dwind_any_win4_aadt25k>=.6)
	replace down60_any_aadt25k_4=. if mi_to_nid1_aadt25k>.4
	
gen down60_any_mjrhwy_4=(dwind_any_win4_mjrhwy>=.6)
	replace down60_any_mjrhwy_4=. if mi_to_nid1_mjrhwy>.4

gen down60_1_mjrhwy_4=(dwind_1_win4_mjrhwy>=.6)
	replace down60_1_mjrhwy_4=. if mi_to_nid1_mjrhwy>.4	
	replace down60_1_mjrhwy_4=0 if mi_to_nid1_mjrhwy>.4&mi_to_nid1_mjrhwy<1	

tab down60_any_aadt25k_4 down60_any_mjrhwy_4, m	
	
save "$samples\collapsed_All0629.dta", replace

tab placebo_any_mjrhwy dwind_any_win4_mjrhwy 

gen flag=1 if down60_1_mjrhwy_4==0&down60IS_4==1
* BEFORE: School 480062 was flagged as strange on 6/7/18
